/*==============================================================================
step0650 - collapse World Values Survey.do

Outline
1) merge NUTS 2006 to WVS to NUTS 2010 crosswalk
2) collapse average answer to question by country and region 
(i.e. NUTS1, NUTS2, NUTS3, U.S. region, Canada region)  

Note: There are two main data files containing WVS/EVS:
"wvs1981_2008_v20090914.dta"
"with-NUTS/ZA4804_v2-0-0.dta"

==============================================================================*/

clear all
set maxvar 32767
set more off

* ------------------------------------------------------------------------------
* Merge bridge files with EVS
* ------------------------------------------------------------------------------

! uncompress "$scratch/WVS/with-NUTS/ZA4804_v2-0-0.dta.Z"
use "$scratch/WVS/with-NUTS/ZA4804_v2-0-0.dta"
	
rename s009 country
rename s017 weight
	
rename x048 region
rename x048a region_nuts1
rename x048b region_nuts2
rename x048c region_nuts3
	
gen count=1
	
replace country =  "UK" if country== "GB-GBN"
replace country =  "UK" if country== "GB-NIR"
replace country =  "EL" if country== "GR"
	
gen insample=0
foreach country in  "AT" "BE" "CA" "CH" "DE" "DK" "ES" "FI" "FR" "IT" "NL" "SE" /// 
					"UK" "US" {
	quietly replace insample= 1 if country=="`country'"
}
keep if insample==1

*drop observations with no geographic data in WVS
drop if region==.b & (region_nuts1==.a|region_nuts1==.d) & ///
(region_nuts2==.a|region_nuts2==.d) & (region_nuts3==.a|region_nuts3==.d) 
keep if s002evs==4 //nuts codes only available in 2008 EVS
	
tempfile merge
save `merge.dta'

forval n=1/2 { 
	
	use `merge.dta', clear
	rename region_nuts`n' wvs_code
			
	merge m:m wvs_code using "$dta_files/nuts_2006_2010_wvs_crosswalk.dta", ///
	gen(_merge_check_nuts`n')  noreport
	keep if nuts_level==`n'
	
	tab _merge_check_nuts`n' //note: even though the region has a WVS code, there are a number of regions that have no survey respondants
	
	gen nuts`n'= code2010
	
	tempfile nuts`n'_merge
	save `nuts`n'_merge.dta'
}

quietly append using `nuts1_merge.dta'

/*
Splits. 
This problem affects only the following 14 nuts: FI1B, FI1C, UKD62, UKD63, 
UKE44, UKE45, UKF24, UKF25, UKG36, UKG37, UKG38, UKG39, UKH24, UKH25. 

Before the collapse, tie all WVS response to only one of the two regions that 
split  (i.e. FI1B). Create duplicates after the collapse so that one of the 
regions that split (i.e. FI1B) is tied to one duplicate, and the other 
(i.e. FI1C) is tied to another. 
*/

replace nuts2="FI1B" if nuts2=="FI1C"

tempfile wvs
save `wvs.dta'

*NUTS code collapse, European Countries Only

local variables1 s016 a0* a1* b0* c0* 
local variables2 d0* e0* e1* f0* f1* 
local variables3 g0* u0* v0* 
local variables4 w0* x001-x047a x049-y002

forval n=1/2 {
	forval x=1/4 {
		use count s020 weight nuts`n' country `variables`x'' using `wvs.dta' ///
		if nuts`n'!="", clear
		
		foreach var of varlist `variables`x'' {

			quietly sum `var' 
			
			if r(N)!=0 {	
			xi i.`var', noomit 
			
			foreach freq_var of varlist _I* {
				local newname = substr("`freq_var'",3,20)
				rename `freq_var' freq_`newname'
				}
			}
		}
		
		foreach v of var `variables`x'' {
			local l`v' : variable label `v'
			if `"`l`v''"' == "" {
			local l`v' "`v'"
			}
		}
		
		bys nuts`n': gen sample_size = _N 
		
		collapse (mean) `variables`x'' (sum) freq* (first) country s020 /// 
		sample_size (count) count [pw=weight], by(nuts`n') fast
			
		foreach v of var `variables`x'' {
			label var `v' "`l`v''"
		}
			
		sort country nuts`n'	
		tempfile collapse_nuts`n'_`x'
		save `collapse_nuts`n'_`x'.dta' 
		
		}
	}


	forval n=1/2 {
	
			use `collapse_nuts`n'_1.dta', clear
	
			forval x=2/4 {
				merge 1:1 nuts using `collapse_nuts`n'_`x'.dta', nogen assert(3)
			}
		
	save "$dta_files/wvs_collapse_nuts`n'.dta", replace
}

use `wvs.dta', clear

*Collapse Average Response for Entire Country, European Countries

forval x=1/4 {
	
	use country count nuts1 s020 weight `variables`x'' using `wvs.dta' if nuts1!="", clear //means & counts at country level are for all observations with NUTS 1 reported 
	foreach var of varlist `variables`x'' {
	
		quietly sum `var' 
		
		if r(N)!=0 {	
		xi i.`var', noomit 
		
		foreach freq_var of varlist _I* {
			local newname = substr("`freq_var'",3,20)
			rename `freq_var' freq_`newname'
			}
		}
	}

	foreach v of var `variables`x'' {
		local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}

	bys country: gen sample_size = _N

	collapse (mean) `variables`x'' (sum) freq* (first) s020 sample_size /// 
	(count) count [pw=weight], by(country) fast

	foreach v of var `variables`x'' {
		label var `v' "`l`v''"
	}

	sort country	
	tempfile collapse_eu_country_`x'
	save `collapse_eu_country_`x'.dta'
}

use `collapse_eu_country_1.dta', clear

forval x=2/4 {
	merge 1:1 country using `collapse_eu_country_`x'', nogen assert(3)
}	
	
sort country 
save "$dta_files/wvs_collapse_eu_country.dta", replace

*Collapse Average Response for Entire Country, U.S. & Canada
! uncompress "$scratch/WVS/wvs1981_2008_v20090914.dta.Z"
use "$scratch/WVS/wvs1981_2008_v20090914.dta", clear

rename s009 country
rename x048 region
rename s017 weight
gen count=1
keep if (country=="US"|country=="CA") & s020==2006

replace region = 840009 if region== 840011 //alaska coded separately in 2006 WVS
replace region = 840009 if region== 840012 //Hawaii coded separately in 2006 WVS

tempfile wvs2
save `wvs2.dta'

local variables1 s016 a0* a1* b0* c0* 
local variables2 d0* e0* e1* f0* f1* 
local variables3 g0* // note: no u0* or v0* variables for US & Canada
local variables4 x0* // note: no w0* variables for US & Canada

forval x=1/4 {
	
	use count country weight s020 `variables`x'' using `wvs2.dta', clear
	
	foreach var of varlist `variables`x'' {

		quietly sum `var' 
		
		if r(N)!=0 {	
		xi i.`var', noomit 
		
		foreach freq_var of varlist _I* {
			local newname = substr("`freq_var'",3,20)
			rename `freq_var' freq_`newname'
			}
		}
	}
	
	foreach v of var `variables`x'' {
		local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}		
	
	bys country: gen sample_size = _N
	
	collapse (mean) `variables`x'' (sum) freq* (first) s020 sample_size (count) ///
	count [pw=weight], by(country) fast

	foreach v of var `variables`x'' {
		label var `v' "`l`v''"
	}	

	sort country	
	tempfile collapse_us_ca_country_`x'
	save `collapse_us_ca_country_`x'.dta'
	clear
	
}

use `collapse_us_ca_country_1.dta', clear

forval x=2/4 {
	merge 1:1 country using `collapse_us_ca_country_`x'', nogen assert(3)
}	
	
sort country 
save "$dta_files/wvs_collapse_us_ca_country.dta", replace

*by region, for United States  & Canada
use `wvs2.dta', clear

replace region = 840009 if region== 840011 //alaska coded separately in 2006 WVS
replace region = 840009 if region== 840012 //Hawaii coded separately in 2006 WVS

keep if region!=.

forval x=1/4 {

	use count country region s020 weight `variables`x'' using `wvs2.dta', clear

	foreach var of varlist `variables`x'' {

		quietly sum `var' 
		
		if r(N)!=0 {	
		xi i.`var', noomit 
		
		foreach freq_var of varlist _I* {
			local newname = substr("`freq_var'",3,20)
			rename `freq_var' freq_`newname'
			}
		}
	}

	foreach v of var `variables`x'' {
		local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}

	bys region: gen sample_size = _N

	collapse (mean) `variables`x'' (sum) freq* (first) country s020 sample_size /// 
	(count) count [pw=weight], by(region) fast

	foreach v of var `variables`x'' {
		label var `v' "`l`v''"
	}
	
	sort country region
	tempfile collapse_us_ca_region_`x'
	save `collapse_us_ca_region_`x'.dta'
	
	
}

use `collapse_us_ca_region_1.dta', clear

forval x=2/4 {
	merge 1:1 country region using `collapse_us_ca_region_`x'', nogen assert(3)
}	
save "$dta_files/wvs_collapse_us_ca_region.dta", replace

! compress "$scratch/WVS/wvs1981_2008_v20090914.dta"
! compress "$scratch/WVS/with-NUTS/ZA4804_v2-0-0.dta.dta"
